** Data reading and variable selection from raw data
** Japanese Social Stratification and Mobility 1995


** 01. Reading data **
cap log close
clear all
set more off
cd /*insert you work directory here*/
unicode encoding set "Shift_JIS" 
unicode translate 0763_a.dta
use 0763_a.dta 
//Generate a new id to differentiate the respondents from part A to part B - part A: nid=ID+10000
//the two parts of the datasets are identical apart from respondent's education and parents' education/occupation variable name
/*so I did this:
rename Q10_1 Q61
rename Q21 Q12
rename Q11A Q11A_1
rename Q19A Q11A
rename Q19C Q11C
rename Q19D Q11D
rename Q19E Q11E
rename Q19F Q11F 
rename Q22_1 Q13_1
rename Q22_2A Q13_2A
rename Q22_2D Q13_2B
rename Q22_2E Q13_2C
rename Q22_2F Q13_2D */
save 0763_a.dta, replace

cap log close
clear all
set more off
cd /*insert you work directory here*/
unicode encoding set "Shift_JIS" 
unicode translate 0763_b.dta
use 0763_b.dta
//Generate a new id to differentiate the respondents from part A to part B - part B: nid=ID+20000
save 0763_b.dta, replace
append using 0763_a.dta
save 0763.dta, replace

use 0763.dta
numlabel,add


** 02. Constructing year and country variables **

ge year=1995
lab var year "survey year"

ge country=392
lab var country "ISO country code"
//Japan: 392 (see "ISO Country Codes.pdf) 


** 03. ID variables **

ge pid=nid
lab var pid "person id"


** 04. Basic Demographics (Sex and Age/birth year) **

ge sex=Q1_1
lab var sex "sex"
lab def sex 1 "male" 2 "female"
lab val sex sex

ge age=Q1_2A
lab var age "age"

ge birthyr=year-age
lab var birthyr "year of birth"


** 05. Siblings **

ge nsibs=Q2A-1
lab var nsibs "number of siblings"
lab def nsibs 98 "Don't Know/No Answer"
lab val nsibs nsibs

ge birthorder=Q2B
lab var birthorder "birth order"
lab val birthorder nsibs

ge sexorder=Q2C
lab var sexorder "birth order among siblings of the same sex"
lab val sexorder nsibs


** 06. Own education **

ge educ_a=Q61
lab var educ_a "last school attended"

lab def educ_a 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
12 "junior high" 13 "high" 14 "2-year college/College of technology" 15 "university" 16 "graduate school" 99 "don't know/no answer"

lab val educ_a educ_a

recode Q62 (1=1 "graduated")(2=2 "quit")(3=3 "still in school")(4/max=99 "don't know/no answer"),into(educ_c)
la var educ_c "completion of the last school"

ge educ=educ_a if educ_c==1 | educ_c==3
forvalue i=1/7 {
replace educ=`i'-1 if educ_a!=99 & educ_c==2
}
replace educ=7 if educ_a==12 & educ_c==2
forvalue i=13/16 {
replace educ=`i'-1 if educ_a!=99 & educ_c==2
}
replace educ=99 if educ_a==99 & educ_c==2
replace educ=99 if educ_c==99

lab def educ 0 "no completed formal education" 1 "ordinary elementary_old system" 2 "higher elementary_old system" ///
3 "junior high/girls high_old system" 4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" ///
7 "university/graduate_old system" 12 "junior high" 13 "high" 14 "2-year college/College of technology" 15 "university" ///
16 "graduate school" 98 "not applicable" 99 "don't know/no answer"

lab val educ educ


** 07. Parents' education: Father and/or Mother **

ge faeduc=Q18
lab var faeduc "father's education"

ge moeduc=Q12
lab var moeduc "mother's education"

lab val faeduc moeduc educ


** 08. Own occupation **

//job status
ge empstat=Q4A
lab var empstat "current employment status"

lab def empstat 1 "employer/excutive" 2 "regular employee" 3 "temporary employee/part-time worker" 4 "dispacthed employee" ///
5 "contract worker" 6 "self-employeed/freelance" 7 "family worker" 8 "internal job" 9 "unemployeed: looking for jobs now" ///
10 "unemployeed: not looking for jobs now" 11 "student" 99 "don't know/no answer"

lab val empstat empstat

ge firstempstat=Q5A
lab var firstempstat "first employment status"

lab def firstempstat 1 "employer/excutive" 2 "regular employee" 3 "temporary employee/part-time worker" 4 "dispacthed employee" ///
5  "self-employeed/freelance" 6 "family worker" 7 "internal job" 18 "never had a job" 99 "don't know/no answer"

lab val firstempstat firstempstat

//industry
ge ind=Q4C
lab var ind "current industry"
ge firstind=Q5C
lab var firstind "first industry"

lab def ind 1 "agricultural" 2 "forestry" 3 "fishing" 4 "mining" 5 "construction" 6 "manifacturing" 7 "electricity/gas/heat/water" ///
8 "transportation" 9 "retail" 10 "finance/insurance" 11 "real estate" 12 "news/broadcasting/publishing/press/movie" ///
13 "information and communication service" 14 "medical/welfare" 15 "education/research" 16 "law/accounting" 17 "other" 18 "public service" ///
86 "unemployed" 89 "student" 98 "Not applicable" 99 "Don't know/No answer"

lab val ind firstind ind

//corporation size
ge copsize=Q4D
lab var copsize "current job: the number of employees of the firm"
ge firstcopsize=Q5D
lab var firstcopsize "first job: the number of employees of the firm"

lab def copsize 1 "1 person" 2 "2-4 people" 3 "5-9 people" 4 "10-29 people" 5 "30-99 people" 6 "100-299 people" 7 "300-499 people" ///
8 "500-999 people" 9 "above 1000 people" 10 "government sector" 98 "not applicable" 99 "don't know/no answer"

lab val copsize firstcopsize copsize

//occupation ISCO code: ISCO codes refer to aftercode
ge occ_ISCO=Q4E
lab var occ_ISCO "occupation/work content (ISCO code)"
ge firstocc_ISCO=Q5E
lab var firstocc_ISCO "first occupation/work content (ISCO code)"

lab def occ_ISCO 986 "unemployed" 988 "university student/2-year university student" 989 "graduate student" 990 "other type of student" ///
999 "don't know/no answer"

lab val occ_ISCO firstocc_ISCO occ_ISCO

//job title
ge title=Q4F
lab var title "current job title/managing responsiblities"
ge firsttitle=Q5F
lab var firsttitle "first job title/managing responsiblities"

lab def title 1 "no title" 2 "Group leader, Foreman" 3 "Sub-section Head agency or equivalent - kakaricho" ///
4 "Section Head, Manager or equivalent - kacho" 5 "Department Head, General Manager or equivalent - bucho" ///
6 "Director, Executive Head - shacho" 8 "not applicable" 9 "don't know/not available"

lab val title firsttitle title


** 09. Parents' occupation **

//job status
ge faempstat=Q11A
lab var faempstat "father current employment status"
ge moempstat=Q13_2A
lab var moempstat "mother current employment status"
lab val faempstat moempstat empstat

//mother: working after marriage
ge mowork=Q13_1
lab var mowork "mother working status after marriage"

lab def mowork 1 "always been working" 2 "never been working" 3 "became unemployed after marriage" 9 "don't know/no answer"

lab val mowork mowork

//father: industry
ge faind=Q11C
lab var faind "father current industry"
lab val faind ind

//corporation size
ge facopsize=Q11D
lab var facopsize "father current job: the number of employees of the firm"
ge mocopsize=Q13_2B
lab var mocopsize "mother current job: the number of employees of the firm"
lab val facopsize mocopsize copsize

//occupation ISCO code: ISCO codes refer to aftercode
ge faocc_ISCO=Q11E
lab var faocc_ISCO "father occupation/work content (ISCO code)"
ge moocc_ISCO=Q13_2C
lab var moocc_ISCO "mother occupation/work content (ISCO code)"
lab val faocc_ISCO moocc_ISCO occ_ISCO

//job title
ge fatitle=Q11F
lab var fatitle "father job title/managing responsiblities"
ge motitle=Q13_2D
lab var motitle "mother job title/managing responsiblities"
lab val fatitle motitle title


** 10. Tabulate the Identified Variables **

log using /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** Japanese Social Stratification and Mobility 1995

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs birthorder sexorder, d

** R's Own Education **
tab1 educ_a educ_c educ 

** Parental Education **
tab1 faeduc moeduc 

** R's Own Occupation **
tab1 empstat firstempstat ind firstind copsize firstcopsize occ_ISCO firstocc_ISCO title firsttitle 

** Parental Occupation **
tab1 faempstat moempstat mowork faind facopsize mocopsize faocc_ISCO moocc_ISCO fatitle motitle

log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nsibs birthorder sexorder ///
	 educ_a educ_c educ faeduc moeduc ///
	 empstat firstempstat ind firstind copsize firstcopsize occ_ISCO firstocc_ISCO title firsttitle ///
	 faempstat moempstat mowork faind facopsize mocopsize faocc_ISCO moocc_ISCO fatitle motitle


** 12. Save the Data File **

saveold /*insert you work directory here*/, replace



** 13. Homoginising education **
** Own Education **
rename educ educ_cat

ge educ_yrs=6 if educ_cat==1
replace educ_yrs=8 if educ_cat==2
replace educ_yrs=10 if educ_cat==3
replace educ_yrs=11 if educ_cat==4
replace educ_yrs=14 if educ_cat==5
replace educ_yrs=14 if educ_cat==6
replace educ_yrs=16 if educ_cat==7
replace educ_yrs=9 if educ_cat==12
replace educ_yrs=12 if educ_cat==13
replace educ_yrs=14 if educ_cat==14
replace educ_yrs=16 if educ_cat==15
replace educ_yrs=20 if educ_cat==16
replace educ_yrs=. if educ_cat==99
lab var educ_yrs "respondent highest education in years"

ge educ_ISCED=100 if educ_cat==1
replace educ_ISCED=100 if educ_cat==2
replace educ_ISCED=300 if educ_cat==3
replace educ_ISCED=300 if educ_cat==4
replace educ_ISCED=300 if educ_cat==5
replace educ_ISCED=500 if educ_cat==6
replace educ_ISCED=600 if educ_cat==7
replace educ_ISCED=244 if educ_cat==12
replace educ_ISCED=344 if educ_cat==13
replace educ_ISCED=500 if educ_cat==14
replace educ_ISCED=665 if educ_cat==15
replace educ_ISCED=767 if educ_cat==16
replace educ_ISCED=. if educ_cat==98
replace educ_ISCED=. if educ_cat==99
lab var educ_ISCED "respondent highest education in ISCED code"

** Parents Education **

ge faeduc_flag=1 

rename faeduc faeduc_cat
rename moeduc maeduc_cat

ge faeduc_yrs=0 if faeduc_cat==0
replace faeduc_yrs=6 if faeduc_cat==1
replace faeduc_yrs=8 if faeduc_cat==2
replace faeduc_yrs=10 if faeduc_cat==3
replace faeduc_yrs=11 if faeduc_cat==4
replace faeduc_yrs=14 if faeduc_cat==5
replace faeduc_yrs=14 if faeduc_cat==6
replace faeduc_yrs=16 if faeduc_cat==7
replace faeduc_yrs=9 if faeduc_cat==12
replace faeduc_yrs=12 if faeduc_cat==13
replace faeduc_yrs=14 if faeduc_cat==14
replace faeduc_yrs=16 if faeduc_cat==15
replace faeduc_yrs=20 if faeduc_cat==16
replace faeduc_yrs=. if faeduc_cat==98
replace faeduc_yrs=. if faeduc_cat==99
lab var faeduc_yrs "father's education in years"

ge maeduc_yrs=0 if maeduc_cat==0
replace maeduc_yrs=6 if maeduc_cat==1
replace maeduc_yrs=8 if maeduc_cat==2
replace maeduc_yrs=10 if maeduc_cat==3
replace maeduc_yrs=11 if maeduc_cat==4
replace maeduc_yrs=14 if maeduc_cat==5
replace maeduc_yrs=14 if maeduc_cat==6
replace maeduc_yrs=16 if maeduc_cat==7
replace maeduc_yrs=9 if maeduc_cat==12
replace maeduc_yrs=12 if maeduc_cat==13
replace maeduc_yrs=14 if maeduc_cat==14
replace maeduc_yrs=16 if maeduc_cat==15
replace maeduc_yrs=20 if maeduc_cat==16
replace maeduc_yrs=. if maeduc_cat==98
replace maeduc_yrs=. if maeduc_cat==99
lab var maeduc_yrs "mother's education in years"

ge faeduc_ISCED=100 if faeduc_cat==1
replace faeduc_ISCED=100 if faeduc_cat==2
replace faeduc_ISCED=300 if faeduc_cat==3
replace faeduc_ISCED=300 if faeduc_cat==4
replace faeduc_ISCED=300 if faeduc_cat==5
replace faeduc_ISCED=500 if faeduc_cat==6
replace faeduc_ISCED=600 if faeduc_cat==7
replace faeduc_ISCED=244 if faeduc_cat==12
replace faeduc_ISCED=344 if faeduc_cat==13
replace faeduc_ISCED=500 if faeduc_cat==14
replace faeduc_ISCED=665 if faeduc_cat==15
replace faeduc_ISCED=767 if faeduc_cat==16
replace faeduc_ISCED=. if faeduc_cat==98
replace faeduc_ISCED=. if faeduc_cat==99
lab var faeduc_ISCED "father highest education in ISCED code"

ge maeduc_ISCED=100 if maeduc_cat==1
replace maeduc_ISCED=100 if maeduc_cat==2
replace maeduc_ISCED=300 if maeduc_cat==3
replace maeduc_ISCED=300 if maeduc_cat==4
replace maeduc_ISCED=300 if maeduc_cat==5
replace maeduc_ISCED=500 if maeduc_cat==6
replace maeduc_ISCED=600 if maeduc_cat==7
replace maeduc_ISCED=244 if maeduc_cat==12
replace maeduc_ISCED=344 if maeduc_cat==13
replace maeduc_ISCED=500 if maeduc_cat==14
replace maeduc_ISCED=665 if maeduc_cat==15
replace maeduc_ISCED=767 if maeduc_cat==16
replace maeduc_ISCED=. if maeduc_cat==98
replace maeduc_ISCED=. if maeduc_cat==99
lab var maeduc_ISCED "mother highest education in ISCED code"

** 14. Homoginising sibling **
//cutoff
ge nsibs_flag=99
lab var nsibs_flag "cutoff of total number of siblings"

lab def nsib_flag 99 "no cutoff"
lab val nsibs_flag nsib_flag

//recode missing
replace nsibs=. if nsibs==98

//number of brothers and sisters not avaialable


** 15. Tab Education and Sibling Variables **
tab1 sex age birthyr
tab1 educ_cat educ_yrs faeduc_cat faeduc_yrs maeduc_cat maeduc_yrs faeduc_flag 
tab1 nsibs nsibs_flag


** 16. Save the Data File **

saveold /*insert you work directory here*/, replace

